# Replication file for: "Beyond the Hazard Ratio: Generating        #
# Expected Durations from the Cox Proportional Hazards Model"       #
#                                                                   #
# Jonathan Kropko                                                   #
# University of Virginia                                            #
# jkropko@virginia.edu                                              #
#                                                                   #
# Jeffrey J. Harden                                                 #
# University of Notre Dame                                          #
# jeff.harden@nd.edu                                                #
#                                                                   #
# cox.ed.fnp() function (NPSF approach)                             #
# This function computes the expected durations with a discrete     #
# duration cloglog model using the observed value method for        # 
# setting independent variable profiles (see Hanmer and Kalkan      #
# 2013).                                                            #
# Last update: May 20, 2017                                         #
#####################################################################
### Arguments ###
# cox.model: A coxph() object.
# var.name: Name of the independent variable of interest.
# val1: Value of var.name for new observation #1.
# val2: Value of var.name for new observation #2.
# m: Number of bootstrap repetitions. The default is 1000.
# ci: Confidence interval level for the expected durations. The default is 95%.
# cluster: Cluster variable if bootstrapping is to be done by clusters of observations rather than individual observations. 
# var.name2: Name of a second independent variable to set (constituent term in an interaction). The default is NA.
# val3: Value for var.name2. The default is NA.
# var.name3: Name of a third independent variable to set (interaction term). The default is NA.
# val4: Value of var.name3 for new observation #1. The default is NA.
# val5: Value of var.name3 for new observation #2. The default is NA.
# verbose: Should the program print updates of bootstrap iterations? The default is TRUE.

### Outputs ###
# exp.dur: Expected duration for each observation.
# ed.pe: Expected duration point estimates for the two new observations.
# ed.se: Expected duration standard errors for the two new observations.
# ed.lo: Expected duration lower confidence bounds for the two new observations.
# ed.hi: Expected duration upper confidence bounds for the two new observations.
# diff.pe: First difference in expected duration point estimates for the two new observations.
# diff.se: First difference in expected duration standard errors for the two new observations.
# diff.lo: First difference in expected duration lower confidence bounds for the two new observations.
# diff.hi: First difference in expected duration upper confidence bounds for the two new observations.

### FNP Function ###

fnp <- function(coef, data, y, failed, var.name, val1, val2, var.name2 = NA, val3 = NA, var.name3 = NA, val4 = NA, val5 = NA, verbose = TRUE) {

  data <- na.omit(data)
  y <- na.omit(y)
  failed <- na.omit(failed)
  
  mn <- mean(data%*%coef)
  exp.xb <- exp(data%*%coef-mn)
  
  # Compile total failures (only non-censored) at each time point
  h <- cbind(y,failed,exp.xb)
  h <- h[order(h[,1]),]
  h <- aggregate(h[,-1], by=list(h[,1]), FUN="sum")  
  colnames(h) <- c("time", "total.failures", "exp.xb")
  
  # Construction of the risk set (includes censored and non-censored observations)
  h[,3] <- rev(cumsum(rev(h[,3])))
  
  #Construct CBH, baseline survivor and failure CDF
  CBH <- cumsum(h[,2]/h[,3])   
  S.bl <- exp(-CBH)
  
  #Overall expected duration  
  expect.bl <- sum(diff(h[,1])*S.bl[-1])  ## using the fact that E(x) = \int 1-F(x) dx ... and applying a right Riemann sum
  
  #Generate EDs for all observations 
  survival <- t(sapply(exp.xb, FUN=function(x){S.bl^x}, simplify=TRUE)) 
  expect.duration <- apply(survival, 1, FUN=function(x){
    sum(diff(h[,1])*x[-1])
  })
  
  
  # Generate EDs for new covariate profiles
  x1 <- x2 <- data
  
  # Put together two new independent variable profiles
  x1[ , paste(var.name)] <- val1
  x2[ , paste(var.name)] <- val2
  
  if(is.character(var.name2)){
    x1[ , paste(var.name2)] <- x2[ , paste(var.name2)] <- val3
  } 
  
  if(is.character(var.name3)){
    x1[ , paste(var.name3)] <- val4
    x2[ , paste(var.name3)] <- val5
  }  
  
  exp.xb.1 <- exp(x1%*%coef-mn)
  survival.1 <- t(sapply(exp.xb.1, FUN=function(x){S.bl^x}, simplify=TRUE)) 
  expect.duration.1 <- apply(survival.1, 1, FUN=function(x){
    sum(diff(h[,1])*x[-1])
  })

  exp.xb.2 <- exp(x2%*%coef-mn)
  survival.2 <- t(sapply(exp.xb.2, FUN=function(x){S.bl^x}, simplify=TRUE)) 
  expect.duration.2 <- apply(survival.2, 1, FUN=function(x){
    sum(diff(h[,1])*x[-1])
  })  
    
  dhr <-mean((exp.xb.2 - exp.xb.1)/exp.xb.1)

  ed.pe <- c(median(expect.duration.1), median(expect.duration.2))
  diff.pe <- median(expect.duration.1) - median(expect.duration.2)
  
  toreturn <- list(expect.bl, expect.duration, ed.pe, diff.pe, dhr)
  names(toreturn) <- c("exp.dur", "exp.dur.vector", "ed.pe", "diff.pe", "dhr")
  return(toreturn)
}

### Cox ED-FNP function begins here ###

cox.ed.fnp <- function(cox.model, var.name, val1, val2, m = 1000, ci = .95, cluster = NULL, k = -1, var.name2 = NA, val3 = NA, var.name3 = NA, val4 = NA, val5 = NA){
  require(mgcv)
  require(MASS)
  require(rms)
  tryCatch(source("~/Dropbox/Research/CPH Expected Durations/R Function/bootcov2.R"), error=function(x){source("~/Dropbox/CPH Expected Durations/R Function/bootcov2.R")})
  
## Run the function to obtain PEs
fnp.pe <- fnp(data=model.matrix(cox.model), coef=coef(cox.model), y=cox.model$y[,1], failed=cox.model$y[,2], var.name, val1, val2, var.name2, val3, var.name3, val4 , val5)
exp.dur <- fnp.pe$exp.dur.vector
ed.pe <- fnp.pe$ed.pe
diff.pe <- fnp.pe$diff.pe
dhr.pe <- fnp.pe$dhr
baseline.hazard <- fnp.pe$baseline.hazard
  
## Draw new data via bootstrapping ##
if(length(cluster) > 1){
  boot.model <- bootcov2(cph(Surv(as.numeric(cox.model$y[ , 1]), as.numeric(cox.model$y[ , 2]), type = "right") ~ model.matrix(cox.model), x = TRUE, y = TRUE), maxit = 50, B = m, cluster = cluster)
  boot.betas <- boot.model$boot.Coef
} else{
  boot.model <- bootcov2(cph(Surv(as.numeric(cox.model$y[ , 1]), as.numeric(cox.model$y[ , 2]), type = "right") ~ model.matrix(cox.model), x = TRUE, y = TRUE), maxit = 50, B = m)
  boot.betas <- boot.model$boot.Coef 
}

# Begin loop
i <- 1
exp.dur.bs <- list() # matrix(NA, nrow(boot.model$b.ind), length(exp.dur)) -- needed for cluster bootstrapping, JH 2/13/16
ed.pe.bs <- list() # matrix(NA, nrow(boot.model$b.ind), 2)
diff.pe.bs <- numeric(nrow(boot.betas))
dhr.pe.bs <- numeric(nrow(boot.betas))

while(i <= nrow(boot.betas)){ 
  fnp.bs <- fnp(data=model.matrix(cox.model)[boot.model$b.ind[ , i] , ], coef=boot.betas[i,], y=cox.model$y[boot.model$b.ind[ , i] , 1], 
                failed=cox.model$y[boot.model$b.ind[ , i] , 2], var.name, val1, val2, var.name2, val3, var.name3, val4 , val5)
  exp.dur.bs[[i]] <- na.omit(fnp.bs$exp.dur.vector)
  ed.pe.bs[[i]] <- fnp.bs$ed.pe
  diff.pe.bs[[i]] <- fnp.bs$diff.pe
  dhr.pe.bs[[i]] <- fnp.bs$dhr
  i <- i+1
}

expdur.se <- unlist(lapply(exp.dur.bs, sd))
ed.se <- apply(matrix(unlist(ed.pe.bs), ncol = 2, byrow = TRUE), 2, sd)   
ed.lo <- ed.pe - (qnorm(1 - (1 - ci)/2)*ed.se) #apply(matrix(unlist(ed.pe.bs), ncol = 2, byrow = TRUE), 2, FUN=function(x){quantile(x,probs=.025, na.rm=TRUE)})
ed.hi <- ed.pe + (qnorm(1 - (1 - ci)/2)*ed.se) #apply(matrix(unlist(ed.pe.bs), ncol = 2, byrow = TRUE), 2, FUN=function(x){quantile(x,probs=.975, na.rm=TRUE)})
diff.se <- sd(diff.pe.bs)
diff.lo <- diff.pe - (qnorm(1 - (1 - ci)/2)*diff.se) #quantile(diff.pe.bs, probs=.025, na.rm=TRUE)
diff.hi <- diff.pe + (qnorm(1 - (1 - ci)/2)*diff.se) #quantile(diff.pe.bs, probs=.975, na.rm=TRUE)
dhr.se <- sd(dhr.pe.bs)
dhr.lo <- quantile(dhr.pe.bs, probs=.025, na.rm=TRUE)
dhr.hi <- quantile(dhr.pe.bs, probs=.975, na.rm=TRUE)
  
return(list(y = cox.model$y[,1], failed = cox.model$y[,2], exp.dur = as.vector(exp.dur), 
            expdur.se = as.vector(expdur.se), ed.pe = ed.pe, ed.se = ed.se, ed.lo = ed.lo, 
            ed.hi = ed.hi, diff.pe = diff.pe, diff.se = diff.se, diff.lo = diff.lo, diff.hi = diff.hi, 
            dhr.pe = dhr.pe, dhr.se = dhr.se, dhr.lo = dhr.lo, dhr.hi = dhr.hi, baseline.hazard=baseline.hazard))

}